package ar.uba.fi.taller2.ftrs.parser;

import java.io.*;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;

public class PdfParser extends Parser{

	private String fileName;

	
	PdfParser(String fileName){
		//Librerias para hacer extraccion de texto de archivos PDF en JAVA
		//    *  http://schmidt.devlib.org/java/libraries-pdf.html
		//    *  http://java-source.net/open-source/pdf-libraries
		this.fileName = fileName;		    
	}
	
	public void parsear() throws Exception{
		this.parsear(this.defaultLanguage);
	}
	
	public void parsear(String lang)throws Exception{
		PDFTextStripper pdfStripper =	new PDFTextStripper();		
		File pdfFile = new File(this.fileName);
		PDDocument pdfDoc = PDDocument.load(pdfFile);
		String text = pdfStripper.getText(pdfDoc);
		super.parsear(lang, text);		
		
	}		
		
	
	public static void main(String args[]){
		 PdfParser pdf = new PdfParser("/tmp/test.pdf");
		 try{
		 pdf.parsear();
		 }
		 catch(Exception e){
			 System.out.println(e);
		 }
	}
	
}
